library(openxlsx)
library(tidyverse)
library(reshape2)
library(picante)
library(tidyr)
library(ggpubr)
library(mice)
source("custom_functions.R")

Functions

clinical_boxplot <- function(metadata,variable){
  metadata_var <- metadata %>% dplyr::select(all_of(c("PatientID",
                                                       variable, "Group"))) %>% as.data.frame()
  metadata_var[,variable] <- as.numeric(metadata_var[,variable])
  metadata_var_without_na <- metadata_var[!is.na(metadata_var[,variable]),]
  colors <- c("#309f87","#f9c675","#F08080","#A00000")
  if (length(unique(metadata_var_without_na$Group))==1) colors <- c("#f9c675")
  if (!"healthy" %in% unique(metadata_var_without_na$Group)) colors <- c("#f9c675","#F08080","#A00000")
  
p <- ggplot(metadata_var_without_na) + 
  geom_boxplot(aes(x=Group, y=!!sym(variable)),outliers = FALSE) + 
    geom_jitter(width = 0.2,height = 0,aes(x=Group, y=!!sym(variable), color=Group),size=2) +
  scale_fill_manual(values=colors) + 
scale_color_manual(values=colors) + 
   theme_minimal() + 
 theme(panel.border = element_rect(color = "black", fill = NA, size = 0))
return(p)
}
clinical_correlation <- function(metadata,variable,level,
                                 segment="ileum"){
  set.seed(123)
  level <- tolower(level)
  
  metadata_var <- metadata %>% 
    dplyr::select(all_of(c("SampleID", "PatientID",
                paste0("dys_filtered_",level),
                variable, "Group")))
  
  metadata_var[,variable] <- as.numeric(metadata_var[,variable])
  metadata_var <- drop_na(metadata_var)
  
  if (segment=="colon"){
    n_iterations <- 100
    corrs <- list()
    for (i in 1:n_iterations){
      random_sampled_df <- metadata_var %>%
      group_by(PatientID) %>%  # Group by PatientID
      slice_sample(n = 1) %>% # Randomly pick one row per PatientID
      ungroup()
      
      corr <- cor.table(
        random_sampled_df[,c(variable,paste0("dys_filtered_",level))], 
        cor.method="spearman"
        )
      
      corr$P <- corr$P[2]
      corr$r <- corr$r[2]
      corrs[[i]] <- corr
    }
    
    r_values_list <- unlist(map(corrs, ~ .x$r))  # List of r matrices
    p_values_list <- unlist(map(corrs, ~ .x$P))  # List of P matrices
    mean_r <- round(median(r_values_list),2)
    
    p_value <- quantile(p_values_list,0.9)
    corr <- list()
    corr$r <- mean_r
    corr$P <- p_value
  } else {
    corr <- cor.table(
      metadata_var[,c(variable,paste0("dys_filtered_",level))],
      cor.method="spearman")
    
    corr$P <- corr$P[2]
    corr$r <- round(corr$r[2],2)
  }
  
  return(corr)
}

clinical_correlation_abundances <- function(metadata,variable,taxon,level,
                                            segment="ileum"){
  set.seed(123)
  level <- tolower(level)
  
  metadata_var <- metadata %>% dplyr::select(all_of(c("SampleID", "PatientID",
                                                       variable,taxon,
                                                      "Group")))
  metadata_var[,variable] <- as.numeric(metadata_var[,variable])
  metadata_var <- drop_na(metadata_var)
  
  if (segment=="colon"){
    n_iterations <- 100
    corrs <- list()
    for (i in 1:n_iterations){
      random_sampled_df <- metadata_var %>%
      group_by(PatientID) %>%  # Group by PatientID
      slice_sample(n = 1) %>% # Randomly pick one row per PatientID
      ungroup()
      
      corr <- cor.table(random_sampled_df[,c(variable,taxon)], cor.method="spearman")
      corr$P <- corr$P[2]
      corr$r <- corr$r[2]
      corrs[[i]] <- corr
    }
     r_values_list <- unlist(map(corrs, ~ .x$r))  # List of r matrices
    p_values_list <- unlist(map(corrs, ~ .x$P))  # List of P matrices
    mean_r <- round(median(r_values_list),2)
    
    p_value <- quantile(p_values_list,0.9)
    #min_p <- min(p_values_list)
    #min_p <- round(min_p,2)
    #min_p <- ifelse(min_p<=0.05,"< 0.05",ifelse(min_p<=0.01,"< 0.01",ifelse(min_p<=0.001,"< 0.001",paste0("= ",min_p))))
    
    #max_p <- max(p_values_list)
    #max_p <- round(max_p,2)
    #max_p <- ifelse(max_p<=0.05,"< 0.05",ifelse(max_p<=0.01,"< 0.01",ifelse(max_p<=0.001,"< 0.001",max_p)))
    
    #if (min_p==max_p) combined_df <- min_p
    #else {
    #  max_p <- round(quantile(corr$P,0.9),2)
    #  combined_df <- ifelse(max_p<=0.05,"< 0.05",ifelse(max_p<=0.01,"< 0.01",ifelse(max_p<=0.001,"< 0.001",paste("=",max_p) )))
    #}
    corr <- list()
    corr$r <- mean_r
    #corr$P <- combined_df
    corr$P <- p_value
  } else {
    corr <- cor.table(metadata_var[,c(variable,taxon)], cor.method="spearman")
    #max_p <- round(corr$P[2],2)
    #max_p <- 
    #  max_p <- ifelse(max_p<=0.05,"< 0.05",ifelse(max_p<=0.01,"< 0.01",ifelse(max_p<=0.001,"< 0.001",paste0("= ",max_p))))
    #corr$P <- max_p
    corr$r <- round(corr$r[2],2)
    corr$P <- corr$P[2]
  }
  
  return(corr)
}

clinical_scatter <- function(corr,metadata,variable,level){
  level <- tolower(level)
  df_for_scatter_plot <- metadata %>% 
    dplyr::select(all_of(c("SampleID", "PatientID",
                           paste0("dys_filtered_",level),
                           variable, "Group")))
  
  df_for_scatter_plot[,variable] <- as.numeric(df_for_scatter_plot[,variable])
  
  df_for_scatter_plot <- drop_na(df_for_scatter_plot)
  
  colors <- c("#309f87","#f9c675","#F08080","#A00000")
  if (!"healthy" %in% unique(df_for_scatter_plot$Group)) colors <- c("#f9c675","#F08080","#A00000")
  
  r_corr <- corr$r
  p_corr <- corr$P
  p_corr <- ifelse(p_corr < 0.001, "<0.001", ifelse(p_corr < 0.01, "<0.01", ifelse(p_corr < 0.05, "<0.05", paste0("=",round(p_corr,3)))))
  
  x_var <- paste0("dys_filtered_",level)
  x_position <- (max(df_for_scatter_plot[,x_var])) - 0.2*(max(df_for_scatter_plot[,x_var]) - min(df_for_scatter_plot[,x_var]))
  
  y_position <- max(df_for_scatter_plot[,variable]) - 0.05*(max(df_for_scatter_plot[,variable]) - min(df_for_scatter_plot[,variable]))
  
  labels <- paste0("r = ",r_corr, "\\np ",p_corr)
  p <- ggplot(df_for_scatter_plot) + 
  geom_point(aes(x=!!sym(x_var), y=!!sym(variable), color=Group)) + 
  geom_smooth(aes(x=!!sym(x_var), y=!!sym(variable)),method=lm, se=TRUE) +
    geom_text(aes(x=x_position, y=y_position),
                  label=gsub("\\\\n", "\n", labels),hjust = 0) + 
  scale_color_manual(values=colors) + 
  theme_minimal() + 
 theme(panel.border = element_rect(color = "black", fill = NA, size = 0)) + 
  xlab("MDI")
  return(p)
}

clinical_scatter_abundances <- function(corr,metadata,variable,taxon,level,
                                        size=4){
  level <- tolower(level)
  df_for_scatter_plot <- metadata %>% dplyr::select(all_of(c("SampleID", "PatientID",
                                                       taxon,
                                                       variable, "Group")))
  df_for_scatter_plot[,variable] <- as.numeric(df_for_scatter_plot[,variable])
  df_for_scatter_plot <- drop_na(df_for_scatter_plot)
  colors <- c("#309f87","#f9c675","#F08080","#A00000")
  if (!"healthy" %in% unique(df_for_scatter_plot$Group)) colors <- c("#f9c675","#F08080","#A00000")
  
  x_var <- taxon
  r_corr <- corr$r
  p_corr <- corr$P
  
  x_position <- (max(df_for_scatter_plot[,x_var])) - 0.2*(max(df_for_scatter_plot[,x_var]) - min(df_for_scatter_plot[,x_var]))
  y_position <- max(df_for_scatter_plot[,variable]) - 0.05*(max(df_for_scatter_plot[,variable]) - min(df_for_scatter_plot[,variable]))
  labels <- paste0("r = ",r_corr, "\\np ",p_corr)
  p <- ggplot(df_for_scatter_plot) + 
  geom_point(aes(x=!!sym(x_var), y=!!sym(variable), color=Group)) + 
  geom_smooth(aes(x=!!sym(x_var), y=!!sym(variable)),method=lm, se=TRUE) +
    geom_text(aes(x=x_position, y=y_position,label=gsub("\\\\n", "\n", labels)),size=size) + 
  scale_color_manual(values=colors) + 
  theme_minimal() + 
 theme(panel.border = element_rect(color = "black", fill = NA, size = 0))
  return(p)
}
clinical_contingency <- function(metadata,variable){
  metadata_var <- metadata %>% dplyr::select(all_of(c("PatientID",
                                                       variable, "Group"))) %>% as.data.frame()
  metadata_var_without_na <- metadata_var[!is.na(metadata_var[,variable]),]
  cont_table <- as.data.frame(table(metadata_var_without_na$Group, metadata_var_without_na[,variable]))
  
  p <- ggplot(cont_table, aes(x = Var1, y = Var2, fill = Freq)) +
  geom_tile(color = "white") +  # Tile background color
  geom_text(aes(label = Freq), color = "black", size = 6) +  # Add numbers
  scale_fill_gradient(low = "white", high = "blue") +  # Color gradient
  labs(title = "Contingency Table with Counts", 
       x = "Group", y = variable) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
  return(p)
}

clinical_boxplot_categorical <- function(metadata, variable,level){
  level=tolower(level)
  metadata_var <- metadata %>% dplyr::select(all_of(c("SampleID", "PatientID",
                                                       paste0("dys_filtered_",level),
                                                       variable, "Group")))
  metadata_var <- drop_na(metadata_var)
  metadata_var[,variable] <- as.factor(metadata_var[,variable])

  y_var <- paste0("dys_filtered_",level)
  p <- ggplot(metadata_var) + 
    geom_boxplot(aes(x=!!sym(variable), y=!!sym(y_var)),outliers = FALSE) + 
      geom_jitter(width = 0.2,height = 0,aes(x=!!sym(variable), y=!!sym(y_var),
                                             color=Group),size=2) +
    scale_fill_manual(values=c("#f9c675","#F08080","#A00000")) + 
  scale_color_manual(values=c("#f9c675","#F08080","#A00000")) + 
    theme_minimal() + 
 theme(panel.border = element_rect(color = "black", fill = NA, size = 0))
  return(p)
p
}
subprepare_for_heatmap <- function(corrs_segment,MDI=TRUE){
    corrs_segment <- corrs_segment[!grepl("_log_",names(corrs_segment))]
    names(corrs_segment) <- gsub("score_","",names(corrs_segment))
    # Example list structure for demonstration
    # Initialize empty lists for r and P values
    r_values <- list()
    p_values <- list()
    
    # Extract segment and clinical variables
    for (name in names(corrs_segment)) {
      if (MDI) {
        clinical_var <- sub("^(.*?_){2}", "",  name)
        if (all(grepl("ileum",names(corrs_segment)))) {
        segment <- "MDI terminal ileum"
      } else if (all(grepl("colon",names(corrs_segment)))) {
        segment <- "MDI colon"
      } else (message("PROBLEM with segment"))
        
      }
      else {
        segment <- sub("^(.*?_){3}", "",  name)
        clinical_var <- sub(segment,"", sub("^(.*?_){2}", "",  name))
        clinical_var <- gsub("_","",clinical_var)
      }
  

      # Fill the r and P lists
      if (!is.null(corrs_segment[[name]]$r)) {
        r_values[[segment]][clinical_var] <- corrs_segment[[name]]$r
      }
      
      if (!is.null(corrs_segment[[name]]$P)) {
        p_values[[segment]][clinical_var] <- corrs_segment[[name]]$P
      }
    }

    # Convert the lists to data frames
    r_df <- do.call(rbind, lapply(r_values, function(x) setNames(as.data.frame(t(x)), names(x))))
    p_df <- do.call(rbind, lapply(p_values, function(x) setNames(as.data.frame(t(x)), names(x))))
    
    # Add row names as segment names
    rownames(r_df) <- names(r_values)
    rownames(p_df) <- names(p_values)
    
    if (MDI) pd_df_corrected <- as.data.frame(p.adjust(p_df,method="BH"))
    else pd_df_corrected <- as.data.frame(apply(p_df,2,function(x) p.adjust(x, method="BH")))
    p_df_sig <- pd_df_corrected
    p_df_sig[,] <- ""
    
    p_df_sig[pd_df_corrected < 0.05] <- "*"
    p_df_sig[pd_df_corrected < 0.01] <- "**"
    p_df_sig[pd_df_corrected < 0.001] <- "***"
    
    if (MDI) p_df_sig <- t(p_df_sig) %>% 
      as.data.frame()  %>% 
      `rownames<-`(segment)
    
    return(list(p_df_sig,r_df))
}

prepare_for_heatmap <- function(corrs_ileum,corrs_colon){

  ileum_list <- subprepare_for_heatmap(corrs_ileum)
  colon_list <- subprepare_for_heatmap(corrs_colon)

  p_df_sig <- rbind(ileum_list[[1]],colon_list[[1]])
  r_df <- rbind(ileum_list[[2]],colon_list[[2]])

  return(list(p_df_sig,r_df))
  
}

Import clinical data

# clinical metadata
metadata_clinical <- read.csv("../../data/clinical_data/clinical_metadata_cz.csv")
metadata_clinical$PatientID <- as.character(metadata_clinical$PatientID)

# DYSBIOSIS
metadata_dysbiosis <- read.csv("../../data/clinical_data/dysbiosis_metadata.csv") %>%
  dplyr::filter(Country=="CZ")

# ALPHA DIVERSITY
metadata_alpha_ileum <- read.csv(
  "../results/Q1/alpha_diversity/alpha_indices_terminal_ileum.csv") %>%
  dplyr::filter(Country=="CZ")

metadata_alpha_colon <- read.csv(
  "../results/Q1/alpha_diversity/alpha_indices_colon.csv") %>%
  dplyr::filter(Country=="CZ")

metadata_alpha <- rbind(metadata_alpha_ileum,metadata_alpha_colon) %>%
  mutate(PatientID=Patient) %>%
  dplyr::select(-c(Patient, Group))

# MERGING
metadata_cz <- full_join(metadata_clinical, metadata_dysbiosis, by=c("SampleID","Matrix","PatientID","Group","Country"))

metadata_cz <- full_join(metadata_cz, metadata_alpha, by=c("SampleID","PatientID","Country"))

metadata_cz$Group <- factor(metadata_cz$Group,levels = c("healthy","pre_ltx","non-rPSC","rPSC"))
variables <- c("PatientID", "Group",
               "Bilirubin", "Albumin",
               "ALP", "Platelets",
               "Fecal.calprotectin",
               "MAYO_PSC","AOM_score", 
               "APRI_score",
               "FIB4_score",
               "Nancy_max","eMAYO","MAYO_dai",
                     "Creatinine",
                     "Na",
                     "ALT",
                     "AST",
                     "GGT", "INR")
metadata_cz$Fecal.calprotectin[metadata_cz$Group=="healthy"] <- NA
metadata_cz$INR[metadata_cz$Group=="healthy"] <- NA
metadata_cz$Fecal.calprotectin[metadata_cz$Fecal.calprotectin==">6000"] <- 6000
metadata_cz$Fecal.calprotectin <- as.numeric(metadata_cz$Fecal.calprotectin)
  
metadata_ileum <- metadata_cz %>% subset(Matrix=="TI") %>% as.data.frame()
metadata_colon <- metadata_cz %>% subset(Matrix!="TI") %>% as.data.frame()

# TO DO!!!!!!!!!!!!!! nefunguje ako ma
metadata_for_boxplots <- metadata_cz[,variables] %>%
  group_by(PatientID) %>%
  distinct(PatientID, .keep_all = TRUE) %>%
  as.data.frame()

Import microbiome data

Data Import

Importing ASV, taxa and metadata tables for both Czech and Norway samples.

Czech

path = "../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
                                    check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
                                     check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
                                     check.names = FALSE))

Spliting to segments

Terminal ileum

ileum_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=NULL,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=NULL,
                           metadata_1=metadata_ikem,
                           metadata_2=NULL,
                           segment="TI",Q="clinical")
## Removing 1498 ASV(s)
ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]

Colon

colon_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=NULL,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=NULL,
                           metadata_1=metadata_ikem,
                           metadata_2=NULL,
                           segment="colon",Q="clinical")
## Removing 739 ASV(s)
colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]

Filtering

Rules: - prevalence > 5% (per group) - nearZeroVar with default settings - sequencing depth > 5000 - taxonomic assignment at least order

Sequencing depth

data_filt <- seq_depth_filtering(ileum_asv_tab,
                                 ileum_taxa_tab,
                                 ileum_metadata,
                                 seq_depth_threshold = 10000)
## Removing 73 ASV(s)
filt_ileum_asv_tab <- data_filt[[1]]; alpha_ileum_asv_tab <- filt_ileum_asv_tab
filt_ileum_taxa_tab <- data_filt[[2]]; alpha_ileum_taxa_tab <- filt_ileum_taxa_tab
filt_ileum_metadata <- data_filt[[3]]; alpha_ileum_metadata <- filt_ileum_metadata

seq_step <- dim(filt_ileum_asv_tab)[1]

NearZeroVar

data_filt <- nearzerovar_filtering(filt_ileum_asv_tab, 
                                   filt_ileum_taxa_tab,
                                   filt_ileum_metadata)

filt_ileum_asv_tab <- data_filt[[1]]
filt_ileum_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_ileum_asv_tab)[1]

DYSBIOSIS INDEX

Ileum

All indices

metadata_ileum_melted <- melt(metadata_ileum %>% 
                                dplyr::select("Group", 
                                              "dys_unfiltered_asv",
                                              "dys_unfiltered_genus",
                                              "dys_filtered_asv",
                                              "dys_filtered_genus"))
## Warning: c("The melt generic in data.table has been passed a data.frame and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(metadata_ileum %>% dplyr::select(\"Group\", \"dys_unfiltered_asv\", ). In the next version, this warning will become an error.", 
## "The melt generic in data.table has been passed a data.frame and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(    \"dys_unfiltered_genus\", \"dys_filtered_asv\", \"dys_filtered_genus\")). In the next version, this warning will become an error."
## )
## Using Group as id variables
p_il <- ggplot(metadata_ileum_melted) + 
  geom_boxplot(aes(x=Group, y=value),outliers = FALSE) + 
  geom_jitter(width = 0.2,height = 0,aes(x=Group, y=value, color=Group), 
              size=2) +
  facet_wrap(~variable, ncol = 4,scales = "free") + 
  theme_minimal() + 
 theme(panel.border = element_rect(color = "black", fill = NA, size = 0)) + 
  scale_fill_manual(values=c("#309f87","#f9c675","#F08080","#A00000")) + 
  scale_color_manual(values=c("#309f87","#f9c675","#F08080","#A00000"))

p_il    
## Warning: Removed 12 rows containing non-finite outside the scale range (`stat_boxplot()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range (`geom_point()`).

FILTERED GENUS

dys_limit <- max(metadata_ileum$dys_filtered_genus,na.rm = TRUE) + 0.6*max(metadata_ileum$dys_filtered_genus,na.rm = TRUE)
dys_min_limit <-  min(metadata_ileum$dys_filtered_genus)

p_il_dys_genus <- ggplot(metadata_ileum %>%
                           mutate(`MDI`=dys_filtered_genus)) + 
  geom_boxplot(aes(x=Group, y=`MDI`),
               outliers = FALSE) + 
  geom_jitter(width = 0.3,height = 0,
              aes(x=Group, y=`MDI`, color=Group,shape=Country),
              size=1.5) +
  theme_minimal() + 
 theme(panel.border = element_rect(color = "black", fill = NA, size = 0))  + 
  ylim(dys_min_limit,dys_limit) + 
  scale_fill_manual(values=c("#309f87","#f9c675","#F08080","#A00000")) + 
  scale_color_manual(values=c("#309f87","#f9c675","#F08080","#A00000")) + 
  theme(axis.text.x = element_text(angle = 45,face = "bold",vjust = 0.5))

p_il_dys_genus
## Warning: Removed 6 rows containing non-finite outside the scale range (`stat_boxplot()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range (`geom_point()`).

Colon

All indices

colon_metadata_melted <- melt(metadata_colon %>% 
                                dplyr::select("Group", 
                                              "dys_unfiltered_asv",
                                              "dys_unfiltered_genus",
                                              "dys_filtered_asv",
                                              "dys_filtered_genus"))
## Warning: c("The melt generic in data.table has been passed a data.frame and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(metadata_colon %>% dplyr::select(\"Group\", \"dys_unfiltered_asv\", ). In the next version, this warning will become an error.", 
## "The melt generic in data.table has been passed a data.frame and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(    \"dys_unfiltered_genus\", \"dys_filtered_asv\", \"dys_filtered_genus\")). In the next version, this warning will become an error."
## )
## Using Group as id variables
p_col <- ggplot(colon_metadata_melted) + 
  geom_boxplot(aes(x=Group, y=value),outliers = FALSE) + 
  geom_jitter(width = 0.2,height = 0,
              aes(x=Group, y=value, color=Group),
              size=2) +
  facet_wrap(~variable, ncol = 4,scales = "free") + 
  theme_minimal() + 
 theme(panel.border = element_rect(color = "black", fill = NA, size = 0)) +
  scale_fill_manual(values=c("#309f87","#f9c675","#F08080","#A00000")) + 
  scale_color_manual(values=c("#309f87","#f9c675","#F08080","#A00000"))

p_col  
## Warning: Removed 12 rows containing non-finite outside the scale range (`stat_boxplot()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range (`geom_point()`).

FILTERED GENUS

dys_limit <- max(metadata_colon$dys_filtered_genus,na.rm = TRUE) + 0.6*max(metadata_colon$dys_filtered_genus,na.rm = TRUE)
dys_min_limit <-  min(metadata_colon$dys_filtered_genus)

p_col_dys_genus <- ggplot(metadata_colon %>%
                            mutate(`MDI`=dys_filtered_genus)) + 
  geom_boxplot(aes(x=Group, y=`MDI`),outliers = FALSE) + 
  geom_jitter(width = 0.3,height = 0,
              aes(x=Group, y=`MDI`, color=Group,shape=Country),
              size=1) +
  theme_minimal() + 
 theme(panel.border = element_rect(color = "black", fill = NA, size = 0))+
  ylim(dys_min_limit,dys_limit) + 
  scale_fill_manual(values=c("#309f87","#f9c675","#F08080","#A00000")) + 
  scale_color_manual(values=c("#309f87","#f9c675","#F08080","#A00000")) + 
  theme(axis.text.x = element_text(angle = 45,face = "bold",vjust = 0.5))

p_col_dys_genus
## Warning: Removed 6 rows containing non-finite outside the scale range (`stat_boxplot()`).
## Warning: Removed 6 rows containing missing values or values outside the scale range (`geom_point()`).

Linear models

Ileum

results_model <- pairwise.lm(
  formula = "dys_filtered_genus ~ Group",
  factors=metadata_ileum$Group,
  data=metadata_ileum)

knitr::kable(results_model[[1]][,c("Estimate", "p.adj","sig")],
             digits=3,caption="Results of linear modeling")
Results of linear modeling
Estimate p.adj sig
healthy vs Groupnon-rPSC 17.097 0.000 ***
healthy vs GrouprPSC 22.812 0.000 ***
healthy vs Grouppre_ltx 25.244 0.000 ***
non-rPSC vs GrouprPSC 5.715 0.117
pre_ltx vs Groupnon-rPSC -8.148 0.040 *
pre_ltx vs GrouprPSC -2.433 0.613
results_model <- pairwise.lmer(
  formula = "dys_filtered_genus ~ Group + (1|PatientID)",
  factors=metadata_colon$Group,
  data=metadata_colon)

knitr::kable(results_model[[1]][,c("Estimate", "p.adj","sig")],
             digits=3,caption="Results of linear mixed-effect modeling")
Results of linear mixed-effect modeling
Estimate p.adj sig
healthy vs Groupnon-rPSC 25.829 0.000 ***
healthy vs GrouprPSC 40.137 0.000 ***
healthy vs Grouppre_ltx 37.534 0.000 ***
non-rPSC vs GrouprPSC 14.320 0.001 ***
pre_ltx vs Groupnon-rPSC -11.696 0.007 **
pre_ltx vs GrouprPSC 2.664 0.621
dys <- ggarrange(p_il + ggtitle("Ileum"),
                 p_col + ggtitle("Colon"),nrow = 2)

pdf("results/clinical/dysbiosis_index_boxplots.pdf",
    height = 8,width =15)

dys
dev.off()

ALPHA DIVERSITY

alpha_div_plots <- list()
alpha_boxplots <- list()
corrs <- list()

Ileum

ileum_metadata_alpha_final <- metadata_ileum %>% 
  dplyr::select("SampleID", "PatientID","Group",
                "Observe","Shannon","Simpson","Pielou",
                "dys_filtered_asv", "dys_filtered_genus")
variables <- c("Observe","Shannon","Simpson","Pielou")

for (clinical_variable in variables){
  
  # ILEUM
  # boxplot
  p <- clinical_boxplot(ileum_metadata_alpha_final,
                      variable=clinical_variable)

  alpha_boxplots[[paste0("ileum_",clinical_variable)]] <- p
  
  # correlation
  ## ASV
  level="ASV"
  corr <- clinical_correlation(
    ileum_metadata_alpha_final,
    clinical_variable,
    level)
  
  corrs[[paste0("ileum_asv_",clinical_variable)]] <- corr
  
  p <- clinical_scatter(corr,
                        ileum_metadata_alpha_final,
                        clinical_variable,
                        level)
  
  alpha_div_plots[[paste0("ileum_asv_",clinical_variable)]] <- p

  ## Genus
  level="genus"
  corr <- clinical_correlation(ileum_metadata_alpha_final,
                               clinical_variable,
                               level)
  corrs[[paste0("ileum_genus_",clinical_variable)]] <- corr
  
  p <- clinical_scatter(corr,
                        ileum_metadata_alpha_final,
                        clinical_variable,
                        level)
  
  alpha_div_plots[[paste0("ileum_genus_",clinical_variable)]] <- p
}

Boxplot

alpha_boxplots_ileum <- ggarrange(
  plotlist = alpha_boxplots[grepl("ileum",names(alpha_boxplots))],
  ncol = 4,
  common.legend = TRUE, 
  legend = "right")

alpha_boxplots_ileum

Colon

colon_metadata_alpha_final <- metadata_colon %>% 
  dplyr::select("SampleID", "PatientID", "Group",
                "Observe","Shannon","Simpson","Pielou",
                "dys_filtered_asv","dys_filtered_genus")
variables <- c("Observe","Shannon","Simpson","Pielou")

for (clinical_variable in variables){

  # COLUMN
  p <- clinical_boxplot(colon_metadata_alpha_final,
                        variable=clinical_variable)

  alpha_boxplots[[paste0("colon_",clinical_variable)]] <- p
  
  # correlation
  ## ASV
  level="ASV"
  corr <- clinical_correlation(colon_metadata_alpha_final,
                               clinical_variable,
                               level,
                               segment="colon")
  
  corrs[[paste0("colon_asv_",clinical_variable)]] <- corr
  
  p <- clinical_scatter(corr,
                        colon_metadata_alpha_final,
                        clinical_variable,
                        level)
  
  alpha_div_plots[[paste0("colon_asv_",clinical_variable)]] <- p

  ## Genus
  level="genus"
  corr <- clinical_correlation(colon_metadata_alpha_final,
                               clinical_variable,
                               level,
                               segment="colon")
  
  corrs[[paste0("colon_genus_",clinical_variable)]] <- corr
  
  p <- clinical_scatter(corr,
                        colon_metadata_alpha_final,
                        clinical_variable,
                        level)
  
  alpha_div_plots[[paste0("colon_genus_",clinical_variable)]] <- p
}

Boxplot

alpha_boxplots_colon <- ggarrange(
  plotlist = alpha_boxplots[grepl("colon",names(alpha_boxplots))],
  ncol = 4,
  common.legend = TRUE, 
  legend = "right")

alpha_boxplots_colon

alpha_boxplot <- ggarrange(alpha_boxplots_ileum,
                           alpha_boxplots_colon,
                           nrow = 2)
pdf("results/clinical/alpha_diversity_boxplots.pdf",height = 7,width =17)
alpha_boxplot
dev.off()

Scatterplots

ASV

alpha_dys_asv <- ggarrange(
  plotlist = alpha_div_plots[grepl("asv",names(alpha_div_plots))],
  ncol=4,
  nrow=2,
  common.legend = TRUE,
  legend="right")
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 163 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 163 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 163 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 163 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 163 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 346 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 346 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 346 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 346 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
alpha_dys_asv

Genus

alpha_dys_genus <- ggarrange(
  plotlist = alpha_div_plots[grepl("genus",names(alpha_div_plots))],
  ncol=4,
  nrow=2,
  common.legend = TRUE,
  legend="right")
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 163 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 163 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 163 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 163 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 163 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 346 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 346 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 346 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 346 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
alpha_dys_genus

Saving plots

pdf("results/clinical/alpha_diversity.pdf",height = 8,width =18)
alpha_dys_asv
dev.off()
pdf("results/clinical/alpha_diversity_genus.pdf",height = 8,width =18)
alpha_dys_genus
dev.off()

Linear models

Richness

Ileum

results_model <- pairwise.lm(formula = "Observe ~ Group",
                                     factors=metadata_ileum$Group,
                                     data=metadata_ileum)

knitr::kable(results_model[[1]][,c("Estimate", "p.adj","sig")],
             digits=3,caption="Results of linear modeling")
Results of linear modeling
Estimate p.adj sig
healthy vs Groupnon-rPSC -25.110 0.017 *
healthy vs GrouprPSC -37.475 0.011 *
healthy vs Grouppre_ltx -53.642 0.001 ***
non-rPSC vs GrouprPSC -12.365 0.326
pre_ltx vs Groupnon-rPSC 28.532 0.028 *
pre_ltx vs GrouprPSC 16.167 0.326

Colon

results_model <- pairwise.lmer(
  formula = "Observe ~ Group + (1|Patient)",
  factors=metadata_colon$Group,
  data=metadata_colon %>% mutate(Patient=PatientID))

knitr::kable(results_model[[1]][,c("Estimate", "p.adj","sig")],
             digits=3,caption="Results of linear modeling")
Results of linear modeling
Estimate p.adj sig
healthy vs Groupnon-rPSC -23.271 0.013 *
healthy vs GrouprPSC -40.520 0.003 **
healthy vs Grouppre_ltx -56.232 0.000 ***
non-rPSC vs GrouprPSC -17.144 0.195
pre_ltx vs Groupnon-rPSC 32.962 0.013 *
pre_ltx vs GrouprPSC 15.641 0.339

Shannon

Ileum

results_model <- pairwise.lm(formula = "Shannon ~ Group",
                                     factors=metadata_ileum$Group,
                                     data=metadata_ileum)

knitr::kable(results_model[[1]][,c("Estimate", "p.adj","sig")],
             digits=3,caption="Results of linear modeling")
Results of linear modeling
Estimate p.adj sig
healthy vs Groupnon-rPSC -0.109 0.417
healthy vs GrouprPSC -0.212 0.243
healthy vs Grouppre_ltx -0.505 0.032 *
non-rPSC vs GrouprPSC -0.103 0.460
pre_ltx vs Groupnon-rPSC 0.396 0.040 *
pre_ltx vs GrouprPSC 0.293 0.243

Colon

results_model <- pairwise.lmer(
  formula = "Shannon ~ Group + (1|Patient)",
  factors=metadata_colon$Group,
  data=metadata_colon %>% mutate(Patient=PatientID))

knitr::kable(results_model[[1]][,c("Estimate", "p.adj","sig")],
             digits=3,caption="Results of linear modeling")
Results of linear modeling
Estimate p.adj sig
healthy vs Groupnon-rPSC -0.186 0.112
healthy vs GrouprPSC -0.333 0.015 *
healthy vs Grouppre_ltx -0.585 0.000 ***
non-rPSC vs GrouprPSC -0.147 0.354
pre_ltx vs Groupnon-rPSC 0.397 0.039 *
pre_ltx vs GrouprPSC 0.251 0.289

MDI and clinical variables

variables <- c("ALP","log_ALP",
               "GGT","log_GGT",
               "ALT","log_ALT",
               "AST","log_AST",
               "Bilirubin","log_Bilirubin",
               "INR",
               "Albumin",
               "Platelets",
               "Creatinine","log_Creatinine",
               "FIB4_score",
               "APRI_score",
               "MAYO_PSC",
               "AOM_score",
               "Fecal.calprotectin",
               "Nancy_max",
               "eMAYO",
               "MAYO_dai"
               )
boxplots_plots <- list()
corrs <- list()
clinical_plots <- list()

for (clinical_variable in variables){
  
  if (grepl("log_",clinical_variable)) {
   metadata_for_boxplots[,clinical_variable] <- log(as.numeric(metadata_for_boxplots[,gsub("log_","",clinical_variable)]))
   
   metadata_ileum[,clinical_variable] <- log(as.numeric(metadata_ileum[,gsub("log_","",clinical_variable)]))
   metadata_colon[,clinical_variable] <- log(as.numeric(metadata_colon[,gsub("log_","",clinical_variable)]))
  }
  
  # boxplot
  p <- clinical_boxplot(metadata_for_boxplots,
                      variable=clinical_variable)

  boxplots_plots[[clinical_variable]] <- p
  
  # ILEUM
  # correlation
  
  ## ASV
  level="ASV"
  corr <- clinical_correlation(metadata_ileum,
                               clinical_variable,
                               level)
  
  corrs[[paste0("ileum_asv_",clinical_variable)]] <- corr
  
  p <- clinical_scatter(corr,
                        metadata_ileum,
                        clinical_variable,
                        level)
  clinical_plots[[paste0("ileum_asv_",clinical_variable)]] <- p

  ## Genus
  level="genus"
  corr <- clinical_correlation(metadata_ileum,
                               clinical_variable,
                               level)
  corrs[[paste0("ileum_genus_",clinical_variable)]] <- corr
  
  p <- clinical_scatter(corr,
                        metadata_ileum,
                        clinical_variable,
                        level)
  clinical_plots[[paste0("ileum_genus_",clinical_variable)]] <- p
  
  # correlation
  ## ASV
  level="ASV"
  corr <- clinical_correlation(metadata_colon,
                               clinical_variable,
                               level,
                               segment="colon")
  
  corrs[[paste0("colon_asv_",clinical_variable)]] <- corr
  
  p <- clinical_scatter(corr,
                        metadata_colon,
                        clinical_variable,
                        level)
  
  clinical_plots[[paste0("colon_asv_",clinical_variable)]] <- p

  ## Genus
  level="genus"
  corr <- clinical_correlation(metadata_colon,
                               clinical_variable,
                               level,
                               segment="colon")
  
  corrs[[paste0("colon_genus_",clinical_variable)]] <- corr
  
  p <- clinical_scatter(corr,
                        metadata_colon,
                        clinical_variable,
                        level)
  
  clinical_plots[[paste0("colon_genus_",clinical_variable)]] <- p
}
variables <- c("Nancy_max","eMAYO","MAYO_dai")
for (clinical_variable in variables){
  # CONTINGENCY TABLE
  p <- clinical_contingency(metadata_for_boxplots,
                            clinical_variable)
  boxplots_plots[[clinical_variable]] <- p
  
  # ILEUM
  ## ASV
  p <- clinical_boxplot_categorical(metadata_ileum,
                                    clinical_variable,
                                    level = "ASV")
  clinical_plots[[paste0("ileum_asv_",clinical_variable)]] <- p
  
  ## GENUS
  p <- clinical_boxplot_categorical(metadata_ileum,
                                    clinical_variable,
                                    level = "Genus")
  clinical_plots[[paste0("ileum_genus_",clinical_variable)]] <- p
  
  # COLON
  ## ASV
  p <- clinical_boxplot_categorical(metadata_ileum,
                                    clinical_variable,
                                    level = "ASV")
  clinical_plots[[paste0("colon_asv_",clinical_variable)]] <- p
  
  ## GENUS
  p <- clinical_boxplot_categorical(metadata_ileum,
                                    clinical_variable,
                                    level = "Genus")
  clinical_plots[[paste0("colon_genus_",clinical_variable)]] <- p
}

Boxplots

for (i in 1:length(boxplots_plots)){
  name <- names(boxplots_plots)[i]
  pl <- boxplots_plots[[i]]
  pl <- pl + ggtitle(name)
  boxplots_plots[[i]] <- pl
}
bx <- ggarrange(plotlist = boxplots_plots, ncol = 4,nrow=4)
bx
## $`1`

## 
## $`2`

## 
## attr(,"class")
## [1] "list"      "ggarrange"
pdf("results/clinical/clinical_data_boxplots.pdf",height = 15,width =13)
bx
dev.off()

Clinical plots - MDI ~ clinical variable

for (i in 1:length(clinical_plots)){
  name <- names(clinical_plots)[i]
  pl <- clinical_plots[[i]]
  pl <- pl + ggtitle(name)
  clinical_plots[[i]] <- pl
}

cl_asv <- ggarrange(
  plotlist = clinical_plots[grepl("asv",names(clinical_plots))],
  ncol=4,nrow=8)
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 342 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 342 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 161 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 341 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 161 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 341 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 156 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 331 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 156 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 331 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 156 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 332 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 156 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 332 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 342 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 342 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 122 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 244 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 153 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 326 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 340 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 342 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 342 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 150 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 319 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 156 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 330 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 20 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 36 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 20 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 34 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 103 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 199 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 103 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 208 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 103 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 208 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 103 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 208 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
cl_genus <- ggarrange(plotlist = clinical_plots[grepl("genus",names(clinical_plots))],
          ncol=4,nrow=8)
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 342 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 342 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 161 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 341 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 161 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 341 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 156 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 331 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 156 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 331 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 156 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 332 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 156 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 332 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 342 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 342 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 122 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 244 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 153 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 326 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 340 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 342 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 162 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 342 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 150 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 319 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 156 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 330 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 20 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 36 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 20 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 34 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 103 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 199 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 103 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 208 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 103 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 208 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 103 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
## Warning in geom_text(aes(x = x_position, y = y_position), label = gsub("\\\\n", : All aesthetics have length 1, but the data has 208 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing a single row.
## `geom_smooth()` using formula = 'y ~ x'
cl_asv
## $`1`

## 
## $`2`

## 
## attr(,"class")
## [1] "list"      "ggarrange"
cl_genus
## $`1`

## 
## $`2`

## 
## attr(,"class")
## [1] "list"      "ggarrange"
pdf("results/clinical/correlation.pdf",height = 22,width =18)
cl_asv
dev.off()
pdf("results/clinical/correlation_genus.pdf",height = 22,width =18)
cl_genus
dev.off()

HEATMAP

ASV level

corrs_ileum <- corrs[grepl("ileum_asv",names(corrs))]
corrs_colon <- corrs[grepl("colon_asv",names(corrs))]
prepared_list <- prepare_for_heatmap(corrs_ileum,corrs_colon)
p_df_sig_mdi <- prepared_list[[1]]
r_df_mdi <- prepared_list[[2]]
r_df_melt_mdi <- melt(r_df_mdi %>% rownames_to_column("SeqID"))
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(r_df_mdi %>%
## rownames_to_column("SeqID")). In the next version, this warning will become an error.
## Using SeqID as id variables
p_df_melt_mdi <- melt(p_df_sig_mdi %>% rownames_to_column("SeqID"),
                      id.vars = c("SeqID"))
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(p_df_sig_mdi %>%
## rownames_to_column("SeqID")). In the next version, this warning will become an error.
r_df_melt_mdi$SeqID <- factor(r_df_melt_mdi$SeqID,
                              levels=unique(r_df_melt_mdi$SeqID))

p_df_melt_mdi$SeqID <- factor(p_df_melt_mdi$SeqID,
                              levels=unique(p_df_melt_mdi$SeqID))

r_df_melt_mdi$variable <- factor(r_df_melt_mdi$variable,
                              levels=rev(unique(r_df_melt_mdi$variable)))

p_df_melt_mdi$variable <- factor(p_df_melt_mdi$variable,
                              levels=rev(unique(p_df_melt_mdi$variable)))

p <- ggplot() + 
  geom_tile(data=r_df_melt_mdi, aes(SeqID, variable, fill= value))  + 
  theme_minimal() + 
  scale_fill_gradient2(name = "Rho", low = "blue", mid = "white", high = "red", midpoint = 0,limits = c(-0.7, 0.7)) + 
  geom_text(data=p_df_melt_mdi,aes(x=SeqID,y=variable,label=value,vjust=0.6)) + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  xlab("") + ylab("")  + 
  ggtitle("MDI")+ 
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15))

p

Genus level

corrs_ileum <- corrs[grepl("ileum_genus",names(corrs))]
corrs_colon <- corrs[grepl("colon_genus",names(corrs))]
prepared_list <- prepare_for_heatmap(corrs_ileum,corrs_colon)
p_df_sig_mdi <- prepared_list[[1]]
r_df_mdi <- prepared_list[[2]]
r_df_melt_mdi <- melt(r_df_mdi %>% rownames_to_column("SeqID"))
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(r_df_mdi %>%
## rownames_to_column("SeqID")). In the next version, this warning will become an error.
## Using SeqID as id variables
p_df_melt_mdi <- melt(p_df_sig_mdi %>% rownames_to_column("SeqID"),
                      id.vars = c("SeqID"))
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(p_df_sig_mdi %>%
## rownames_to_column("SeqID")). In the next version, this warning will become an error.
r_df_melt_mdi$SeqID <- factor(r_df_melt_mdi$SeqID,
                              levels=unique(r_df_melt_mdi$SeqID))

p_df_melt_mdi$SeqID <- factor(p_df_melt_mdi$SeqID,
                              levels=unique(p_df_melt_mdi$SeqID))

r_df_melt_mdi$variable <- factor(r_df_melt_mdi$variable,
                              levels=rev(unique(r_df_melt_mdi$variable)))

p_df_melt_mdi$variable <- factor(p_df_melt_mdi$variable,
                              levels=rev(unique(p_df_melt_mdi$variable)))

p_mdi <- ggplot() + 
  geom_tile(data=r_df_melt_mdi, aes(SeqID, variable, fill= value))  + 
  theme_minimal() + 
  scale_fill_gradient2(name = "Rho", low = "blue", mid = "white", high = "red", midpoint = 0,limits = c(-0.7, 0.7)) + 
  geom_text(data=p_df_melt_mdi,aes(x=SeqID,y=variable,label=value,vjust=0.6)) + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  xlab("") + ylab("")  + 
  ggtitle("MDI")+ 
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15))

p_mdi

Bacteria and significant clinical variables

Terminal ileum

Genus level

level="genus"

Aggregation, filtering

# Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level=level,
                             names=TRUE)
# Filtration
filt_data <- filtering_steps(genus_data[[1]],
                             genus_data[[2]],
                             ileum_metadata,
                             seq_depth_threshold=10000)
## Removing 3 ASV(s)
filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata_genus <- filt_data[[3]]
psc_effect_ileum_genus <- read.xlsx(
  "../results/Q1_czech/univariate_analysis/supplements_psc_effect_terminal_ileum.xlsx",
  sheet = "terminal_ileum genus")
psc_taxa <- psc_effect_ileum_genus$SeqID
increased <- psc_effect_ileum_genus$ASV[psc_effect_ileum_genus$log2FoldChange >0]
decreased <- psc_effect_ileum_genus$ASV[psc_effect_ileum_genus$log2FoldChange <0]

Increased + decreased

my_table <- filt_ileum_genus_tab %>% column_to_rownames("SeqID")
data_clr <- vegan::decostand(my_table,method = "clr", MARGIN = 2,pseudocount=0.5) %>% as.matrix()
data_clr <- data_clr[psc_taxa,] %>% t() %>% as.data.frame() %>% rownames_to_column("SampleID")

metadata_with_abundances <- data_clr %>% full_join(metadata_ileum,by="SampleID")
# significant variables
variables <- c("ALP",
               "GGT",
               "APRI_score", 
               "FIB4_score",
               "Albumin",
               "Platelets")
corrs_ileum <- corrs[grepl("ileum_genus",names(corrs))]
corrs_colon <- corrs[grepl("colon_genus",names(corrs))]
prepared_list <- prepare_for_heatmap(corrs_ileum,corrs_colon)
p_df_sig_mdi <- prepared_list[[1]]

variables <- colnames(p_df_sig_mdi)[!apply(p_df_sig_mdi,2,function(x) {
  all(x == "")})]
corrs <- list()
clinical_plots <- list()

for (clinical_variable in variables){
  for (taxon in psc_taxa){
    if (grepl("log_",clinical_variable)) {
   metadata_with_abundances[,clinical_variable] <- log(as.numeric(metadata_with_abundances[,gsub("log_","",clinical_variable)]))
  }
  
  # ILEUM
  # correlation
  corr <- clinical_correlation_abundances(metadata_with_abundances,clinical_variable,taxon,level="genus")
  corrs[[paste0("ileum_genus_",clinical_variable,"_", taxon)]] <- corr
  if (corr$P <0.05){
    p <- clinical_scatter_abundances(corr,metadata_with_abundances,clinical_variable,taxon,level="genus",size = 3)
  clinical_plots[[paste0("ileum_genus_",clinical_variable,"_",taxon)]] <- p
  } else print(corr$P)
  }
  
}
## [1] 0.1795378
## [1] 0.1546398
## [1] 0.1545929
## [1] 0.8736376
## [1] 0.1211876
## [1] 0.2656531
## [1] 0.7206887
## [1] 0.2736377
## [1] 0.3640954
## [1] 0.1392426
## [1] 0.1462809
## [1] 0.3744141
## [1] 0.07350866
## [1] 0.370275
## [1] 0.05424225
## [1] 0.1027328
## [1] 0.06987345
## [1] 0.05892518
## [1] 0.06594912
## [1] 0.2342706
## [1] 0.1622938
## [1] 0.8614502
## [1] 0.0530168
## [1] 0.3562072
## [1] 0.9746608
## [1] 0.559969
## [1] 0.07848014
## [1] 0.1025848
## [1] 0.4607077
## [1] 0.09592839
## [1] 0.4488182
## [1] 0.7472116
## [1] 0.4482218
## [1] 0.9986045
## [1] 0.2183595
## [1] 0.9121244
## [1] 0.1448566
## [1] 0.1691357
## [1] 0.8578561
## [1] 0.6487299
## [1] 0.7833458
## [1] 0.2263479
prepared_list <- subprepare_for_heatmap(corrs,MDI = FALSE)
p_df_sig <- prepared_list[[1]]
r_df <- prepared_list[[2]]
r_df_melt <- melt(r_df %>% rownames_to_column("SeqID"))
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(r_df %>%
## rownames_to_column("SeqID")). In the next version, this warning will become an error.
## Using SeqID as id variables
p_df_melt <- melt(p_df_sig %>% rownames_to_column("SeqID"),id.vars = c("SeqID"))
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(p_df_sig %>%
## rownames_to_column("SeqID")). In the next version, this warning will become an error.
r_df_melt$variable <- factor(r_df_melt$variable, 
                             levels=rev(unique(r_df_melt$variable)))
p_df_melt$variable <- factor(p_df_melt$variable,
                             levels =rev(unique(p_df_melt$variable)))

r_df_melt$SeqID <- factor(r_df_melt$SeqID, levels = unique(r_df_melt$SeqID))
p_df_melt$SeqID <- factor(p_df_melt$SeqID, levels = unique(p_df_melt$SeqID))

p_ileum <- ggplot() + 
  geom_tile(data=r_df_melt, aes(variable, SeqID, fill= value))  + 
  theme_minimal() + 
  scale_fill_gradient2(name = "Rho", low = "blue", mid = "white", high = "red", midpoint = 0,limits = c(-0.5, 0.5)) + 
  geom_text(data=p_df_melt,aes(x=variable,y=SeqID,label=value,vjust=0.6)) + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  xlab("") + ylab("")  + 
  ggtitle("Terminal ileum")+ 
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15))

p_ileum

Colon

Genus level

Aggregation, filtering

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level="Genus",
                             names=TRUE)

filt_colon_genus_tab <- genus_data[[1]]
filt_colon_genus_taxa <- genus_data[[2]]
filt_colon_metadata <- colon_metadata
psc_effect_colon_genus <- read.xlsx("../results/Q1_czech/univariate_analysis/supplements_psc_effect_colon.xlsx",
                                    sheet = "colon genus")

psc_taxa <- psc_effect_colon_genus$SeqID
increased <- psc_effect_colon_genus$SeqID[psc_effect_colon_genus$log2FoldChange >0]
decreased <- psc_effect_colon_genus$SeqID[psc_effect_colon_genus$log2FoldChange <0]
my_table <- filt_colon_genus_tab %>% column_to_rownames("SeqID")
data_clr <- vegan::decostand(my_table,method = "clr", MARGIN = 2,pseudocount=0.5) %>% as.matrix()
data_clr <- data_clr[psc_taxa,] %>% t() %>% as.data.frame() %>% rownames_to_column("SampleID")

metadata_with_abundances <- data_clr %>% full_join(metadata_colon,by="SampleID")
variables <- c("ALP",
               "GGT",
               "APRI_score", 
               "FIB4_score",
               "Albumin",
               "Platelets")
corrs <- list()
clinical_plots <- list()

for (clinical_variable in variables){
  for (taxon in psc_taxa){
    if (grepl("log_",clinical_variable)) {
   metadata_with_abundances[,clinical_variable] <- log(as.numeric(metadata_with_abundances[,gsub("log_","",clinical_variable)]))
  }
  
  # ILEUM
  # correlation
  corr <- clinical_correlation_abundances(metadata_with_abundances,clinical_variable,taxon,level="genus")
  corrs[[paste0("colon_genus_",clinical_variable,"_", taxon)]] <- corr
  if ( (corr$P == "< 0.05") | (corr$P == "< 0.01") | (corr$P == "< 0.01") ){
    p <- clinical_scatter_abundances(corr,metadata_with_abundances,clinical_variable,taxon,level="genus",size = 3)
  clinical_plots[[paste0("colon_genus_",clinical_variable,"_",taxon)]] <- p
  } else print(corr$P)
}
  

}
## [1] 8.902927e-05
## [1] 0.0005647247
## [1] 0.003039914
## [1] 0.06501386
## [1] 2.738443e-06
## [1] 9.798259e-05
## [1] 0.1554447
## [1] 0.001503334
## [1] 0.805154
## [1] 0.04703198
## [1] 0.1156528
## [1] 0.1030576
## [1] 0.04922751
## [1] 0.009930211
## [1] 0.003050694
## [1] 0.6894357
## [1] 0.002591426
## [1] 3.612922e-07
## [1] 0.1067446
## [1] 0.002122119
## [1] 4.605107e-06
## [1] 0.0005938918
## [1] 0.2693496
## [1] 0.1005732
## [1] 0.0004793599
## [1] 0.005430916
## [1] 0.019736
## [1] 0.0006409276
## [1] 0.0818511
## [1] 0.004261477
## [1] 0.001501122
## [1] 0.06822636
## [1] 0.05750884
## [1] 1.710327e-05
## [1] 0.009648598
## [1] 0.1632994
## [1] 0.0008728726
## [1] 1.872338e-05
## [1] 0.3059893
## [1] 0.002394405
## [1] 0.007417647
## [1] 4.493443e-05
## [1] 0.0004702686
## [1] 0.0002254489
## [1] 0.0001007374
## [1] 0.01128886
## [1] 0.08889771
## [1] 0.6197157
## [1] 0.2613796
## [1] 0.07858962
## [1] 0.5299679
## [1] 0.4290324
## [1] 0.164709
## [1] 0.0005174712
## [1] 0.7602667
## [1] 0.1543431
## [1] 0.0007063679
## [1] 0.04209605
## [1] 0.02932357
## [1] 0.1576378
## [1] 0.6115159
## [1] 0.005511981
## [1] 0.004008966
## [1] 2.864847e-05
## [1] 0.119322
## [1] 0.1893823
## [1] 0.008536685
## [1] 0.7151612
## [1] 0.03595876
## [1] 0.002057776
## [1] 0.2738009
## [1] 0.8535685
## [1] 0.9109102
## [1] 0.06012322
## [1] 0.6505823
## [1] 0.7126095
## [1] 0.4899684
## [1] 0.4528184
## [1] 0.09320952
## [1] 0.5288051
## [1] 6.641559e-08
## [1] 0.0001727573
## [1] 1.253515e-05
## [1] 2.530732e-06
## [1] 1.319317e-06
## [1] 0.0002584068
## [1] 0.006789728
## [1] 2.021499e-06
## [1] 3.704793e-05
## [1] 1.16368e-05
## [1] 0.000531207
## [1] 2.6664e-05
## [1] 0.004563502
## [1] 6.481947e-05
## [1] 1.052578e-05
## [1] 0.0001434732
## [1] 4.345159e-11
## [1] 3.559443e-11
## [1] 0.002081744
## [1] 3.959186e-05
## [1] 0.2993047
## [1] 0.0265046
## [1] 0.005909994
## [1] 0.0002104112
## [1] 0.004395611
## [1] 0.02421451
## [1] 0.8308171
## [1] 0.4389772
## [1] 0.6828513
## [1] 0.0002059141
## [1] 0.8829335
## [1] 0.4547569
## [1] 0.07186342
## [1] 0.01705701
## [1] 0.03607561
## [1] 0.0003387316
## [1] 0.00196421
## [1] 0.1842222
## [1] 0.004339134
## [1] 0.4173678
clinical_plots_colon <- clinical_plots
corrs_colon <- corrs
prepared_list <- subprepare_for_heatmap(corrs,MDI = FALSE)
p_df_sig <- prepared_list[[1]]
r_df <- prepared_list[[2]]
r_df_melt <- melt(r_df %>% rownames_to_column("SeqID"))
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(r_df %>%
## rownames_to_column("SeqID")). In the next version, this warning will become an error.
## Using SeqID as id variables
p_df_melt <- melt(p_df_sig %>% rownames_to_column("SeqID"),id.vars = c("SeqID"))
## Warning: The melt generic in data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is superseded and is no longer actively developed, and
## this redirection is now deprecated. To continue using melt methods from reshape2 while both libraries are
## attached, e.g. melt.list, you can prepend the namespace, i.e. reshape2::melt(p_df_sig %>%
## rownames_to_column("SeqID")). In the next version, this warning will become an error.
r_df_melt$variable <- factor(r_df_melt$variable, 
                             levels=rev(unique(r_df_melt$variable)))
p_df_melt$variable <- factor(p_df_melt$variable,
                             levels =rev(unique(p_df_melt$variable)))

r_df_melt$SeqID <- factor(r_df_melt$SeqID, levels = unique(r_df_melt$SeqID))
p_df_melt$SeqID <- factor(p_df_melt$SeqID, levels = unique(p_df_melt$SeqID))

p_colon <- ggplot() + 
  geom_tile(data=r_df_melt, aes(variable, SeqID, fill= value))  + 
  theme_minimal() + 
  scale_fill_gradient2(name = "Rho", low = "blue", mid = "white", high = "red", midpoint = 0,limits = c(-0.5, 0.5)) + 
  geom_text(data=p_df_melt,aes(x=variable,y=SeqID,label=value,vjust=0.6)) + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  xlab("") + ylab("")  + 
  ggtitle("Colon")+ 
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15))

p_colon

heatmap plot

ggarrange(p_mdi,p_ileum,p_colon,ncol = 3,widths = c(0.7,1,1))

PCA and clinical variables fit

Terminal ileum

Analysis

segment="terminal_ileum"

Main analysis - Genus, Aitchison

Genus level, Aitchison distance

level="genus"

Aggregation, filtering

# Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level=level,
                             names=TRUE)
# Filtration
filt_data <- filtering_steps(genus_data[[1]],
                             genus_data[[2]],
                             ileum_metadata,
                             seq_depth_threshold=10000)
## Removing 3 ASV(s)
filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata_genus <- filt_data[[3]]

Plots

p <- pca_plot_custom(filt_ileum_genus_tab,
                                 filt_ileum_genus_taxa,
                                 filt_ileum_metadata_genus,
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, 
                     show_legend=TRUE,
                     clinical = TRUE,
                     clinical_metadata = metadata_cz)


# see the results
p

Supplementary analysis

Genus level

level="genus"
Bray-Curtis

Plots

p <- pca_plot_custom(filt_ileum_genus_tab,
                                 filt_ileum_genus_taxa,
                                 filt_ileum_metadata_genus,
                                 measure = "bray",
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE,
                     clinical=TRUE,
                     clinical_metadata=metadata_cz)

# see the results
p

Jaccard

Plots

p <- pca_plot_custom(filt_ileum_genus_tab,
                                 filt_ileum_genus_taxa,
                                 filt_ileum_metadata_genus,
                                 measure = "jaccard",
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE,
                     clinical=TRUE,
                     clinical_metadata = metadata_cz)

# see the results
p

ASV level

level="ASV"
Aitchison

PCoA

p <- pca_plot_custom(filt_ileum_asv_tab,
                           filt_ileum_taxa_tab,
                           filt_ileum_metadata,
                           show_boxplots = TRUE,
                           variable = "Group", size=3, show_legend=TRUE,
                     clinical=TRUE,
                     clinical_metadata = metadata_cz)


# see the results
p

Bray-Curtis

PCoA

p <- pca_plot_custom(filt_ileum_asv_tab,
                     filt_ileum_taxa_tab,
                     filt_ileum_metadata,
                     measure = "bray",
                     show_boxplots = TRUE,
                     variable = "Group", size=3, show_legend=TRUE,
                     clinical=TRUE,
                     clinical_metadata=metadata_cz)

# see the results
p

Jaccard

PCoA

p <- pca_plot_custom(filt_ileum_asv_tab,
                     filt_ileum_taxa_tab,
                     filt_ileum_metadata,
                     measure = "jaccard",
                     show_boxplots = TRUE,
                     variable = "Group", size=3, show_legend=TRUE,
                     clinical=TRUE,
                     clinical_metadata=metadata_cz)

# see the results
p

Colon

segment="colon"

Filtering

Rules: - prevalence > 5% (per group) - nearZeroVar with default settings - sequencing depth > 5000 - taxonomic assignment at least order

Sequencing depth

data_filt <- seq_depth_filtering(colon_asv_tab,
                                 colon_taxa_tab,
                                 colon_metadata,
                                 seq_depth_threshold = 10000)
## Removing 12 ASV(s)
filt_colon_asv_tab <- data_filt[[1]]; alpha_colon_asv_tab <- filt_colon_asv_tab
filt_colon_taxa_tab <- data_filt[[2]]; alpha_colon_taxa_tab <- filt_colon_taxa_tab
filt_colon_metadata <- data_filt[[3]]; alpha_colon_metadata <- filt_colon_metadata

seq_step <- dim(filt_colon_asv_tab)[1]

NearZeroVar

data_filt <- nearzerovar_filtering(filt_colon_asv_tab,
                                   filt_colon_taxa_tab,
                                   filt_colon_metadata)

filt_colon_asv_tab <- data_filt[[1]]
filt_colon_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_colon_asv_tab)[1]

Check zero depth

data_filt <- check_zero_depth(filt_colon_asv_tab, 
                              filt_colon_taxa_tab, 
                              filt_colon_metadata)

filt_colon_asv_tab <- data_filt[[1]]; 
filt_colon_taxa_tab <- data_filt[[2]]; 
filt_colon_metadata <- data_filt[[3]]; 

Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.

Main analysis - Genus, Aitchison

Genus level, Aitchison distance

level="genus"

Aggregation, filtering

# Aggregation
genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level=level,
                             names=TRUE)
# Filtration
filt_data <- filtering_steps(genus_data[[1]],
                             genus_data[[2]],
                             colon_metadata,
                             seq_depth_threshold=10000)

filt_colon_genus_tab <- filt_data[[1]]
filt_colon_genus_taxa <- filt_data[[2]]
filt_colon_genus_metadata <- filt_data[[3]]

PCoA

p <- pca_plot_custom(filt_colon_genus_tab,
                                 filt_colon_genus_taxa,
                                 filt_colon_genus_metadata,
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE,
                     clinical=TRUE, clinical_metadata = metadata_cz)

# see the results
p

Supplementary analysis

Genus level

level="genus"
Bray-Curtis

Plots

p <- pca_plot_custom(filt_colon_genus_tab,
                                 filt_colon_genus_taxa,
                                 filt_colon_genus_metadata,
                                 measure = "bray",
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE,
                     clinical=TRUE, clinical_metadata = metadata_cz)


# see the results
p

Jaccard

Plots

p <- pca_plot_custom(filt_colon_genus_tab,
                                 filt_colon_genus_taxa,
                                 filt_colon_genus_metadata,
                                 measure = "jaccard",
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=TRUE,
                     clinical=TRUE, clinical_metadata = metadata_cz)


# see the results
p

ASV level

level="ASV"
Aitchison

PCoA

p <- pca_plot_custom(filt_colon_asv_tab,
                           filt_colon_taxa_tab,
                           filt_colon_metadata,
                           show_boxplots = TRUE,
                           variable = "Group", size=3, show_legend=TRUE,
                     clinical=TRUE, clinical_metadata = metadata_cz)


# see the results
p

Bray-Curtis

PCoA

p <- pca_plot_custom(filt_colon_asv_tab,
                     filt_colon_taxa_tab,
                     filt_colon_metadata,
                     measure = "bray",
                     show_boxplots = TRUE,
                     variable = "Group", size=3, show_legend=TRUE,
                     clinical=TRUE, clinical_metadata = metadata_cz)

# see the results
p

Jaccard

PCoA

p <- pca_plot_custom(filt_colon_asv_tab,
                     filt_colon_taxa_tab,
                     filt_colon_metadata,
                     measure = "jaccard",
                     show_boxplots = TRUE,
                     variable = "Group", size=3, show_legend=TRUE,
                      clinical=TRUE, clinical_metadata = metadata_cz)

# see the results
p